add_codes_ms_adm<-function(year){
  setwd(wd_data_intermediate)
  admission_file<-paste("data_adm_hs_siiir_",year,".rds",sep="")
  data_full<-readRDS(admission_file) %>% ungroup() 
  message(paste('Observations before:',nrow(data_full)))
 # data<-data_full %>% group_by(an_adm,judet,scoala_de_provenienta_orig,scoala_de_provenienta) %>% tally(name="n_students") %>% ungroup()
  
  #determine which county middle schools are (most likely) in
  ms_county<-data_full %>% 
    filter(scoala_de_provenienta!='') %>%
    group_by(an_adm,judet_adm,scoala_de_provenienta_orig,scoala_de_provenienta) %>% 
    summarize(n=n()) %>% 
    arrange(scoala_de_provenienta) %>%
    group_by(scoala_de_provenienta) %>%
    mutate(n_jud=n()) %>%
    arrange(scoala_de_provenienta,-n) %>%
    mutate(perc=n/sum(n)) %>%
    mutate(judet_ms=judet_adm[1]) %>%
    select(judet_adm,scoala_de_provenienta,scoala_de_provenienta_orig,judet_ms)
  #add ms county to data
  data_full<-base::merge(data_full,ms_county, by.x=c("judet_adm","scoala_de_provenienta","scoala_de_provenienta_orig"),by.y=c("judet_adm","scoala_de_provenienta","scoala_de_provenienta_orig"),all.x=T)
  
  
  data<-data_full %>% group_by(an_adm,scoala_de_provenienta_orig,scoala_de_provenienta,judet_ms) %>% tally(name="n_students") %>% ungroup()
 
  
  #get towns
  data$town_ms_adm<-NA
  setwd(wd_code)
  setwd('./Codes/Adm MS SIIIR Codes/')
  print('Retrieving towns...')
  eval(parse('get_town_adm_ms.R', encoding = 'UTF-8'))
  data<-get_town_adm_ms(data)
  data<-data %>% group_by(judet_ms,town_ms_adm) %>% mutate(n_schools=length(unique(scoala_de_provenienta)))
  
  ######
  #data<-data %>% filter(judet_ms=='ARGES')
  ####
  
  print('Adding Codes...')
  #1.1 Figure out in which county a middle school (plausibly is)
  #The idea is that on the HS entrance exam, there is only the county of the high school.
  #We don't know the county of the middle school. But if a middle school sends many students
  # to a HS in one county, it is also plausibly in the same county
  school_words<-c('SCOALA CU CLASELE I-VIII |SCOALA GIMNAZIALA |SCOALA GENERALA CLASELE I-VIII| SCOALA CLASELE I-VIII |SCOALA GENERALA |SCOALA CU CL\\. I-VIII\\. |SCOALA CU CL\\. I-VIII ')
  ms_words<-paste0("TEHNOLOGIC DE TURISM SI ALIMENTATIE|CU CLASELE|I-XIII|I-VIII|I-X|SCOALACU CLS\\.|CU CL\\.|CL\\.|COLEGIUL TEHNIC|MARMATIA|AL BANATULUI MONTAN|",
                   "PARTICULAR|DE ARTA|DE INDUSTRIE ALIMENTARA|DEFICIENTI DE VEDERE|BILINGV ROMINO-CROAT|",
                   "DE CONSTRUCTII SI PROTECTIA MEDIULUI|PROFESIONALA DE COOPERATIE|",
                   "DE PROTECTIA MEDIULUI |WALDORF|FEG|DE VEST|",
                   "PENTRU EDUCATIE INCLUZIVA|DE ARTE SI MESERII|GRUPUL SCOLAR|",
                   "CONSTR. CAI FERATE|DE CHIMIE INDUSTRIALA|",
                   "CONSTRUCTII MONTAJ|INDUSTRIAL MINIER|MINIER|",
                   "DE COOPERATIE|INDUSTRIE USOARA|ECONOMIC-ADMINISTRATIV|",
                   "MESERII SI SERVICII|INDUSTRIE MICA SI SERVICII|NATIONALA DE GAZ|",
                   "DE MUZICA SI ARTE PLASTICE|CU PROGRAM DE ATLETISM|DE MUZICA|",
                   "ADMINISTRATIV SI DE SERVICII|ALIMENTATIE PUBLICA|CU PROGRAM SPORTIV|",
                   "FORESTIER|INDUSTRIA STICLEI|COOPERATIST|COOPERATIE|",
                   "DE MARINA|DE AGRICULTURA SI ECONOMIE|AGRICULTURA SI INDUSTRIE ALIMENTARA|PENTRU|AGRICULTURA|INDUSTRIE ALIMENTARA|CONSTRUCTII CAI FERATE|",
                   "DE TRANSPORTURI AUTO|BILINGV ROMANO-CROAT|TEHNIC DE TRANSPORTURI|",
                   "SPECIAL PENTRU DEFICIENTI DE AUZ|ADVENTIST|DEFICIENTI DE VEDERE|TEHNIC|DE MATERIAL RULANT|TRANSPORTURI FEROVIARE|DEFICIENTI DE AUZ|SPECIAL|",
                   "TRANSPORTURI CAI FERATE|VOCATIONAL DE ARTA|ROMANO-CATOLIC|SPECIAL|AUTO |",
                   "SILVIC|TEHNIC|CONSTRUCTII DE MASINI|METALURGIC|PETROL|",
                   "BANATEAN|CARASAN|ALIMENTARA|PENTICOSTAL|BAPTIST|AGRICOL|DE AFACERI|",
                   "REFORMAT|TEOLOGIC|ORTODOX|TEHNOLOGIC FORESTIER|TEHNOLOGIC CONSTRUCTII DE MASINI|",
                   "SPORTIV|SANITAR|ENERGETIC|TEHNOLOGIC|PEDAGOGIC|TEORETIC|DE AGROTURISM|COMERCIAL|AGRICULTURA ECOLOGICA|",
                   "DE INFORMATICA|ECONOMIC|TRANSPORTURI|ADMINISTRATIV|DE TURISM|PENTRU TRANSPORT RUTIER|",
                   "MAGHIARA|TELECOMUNICATII|DE ARTE|CONSTRUCTII|SERVICII|AUTOMECANICA|NATIONAL|",
                   "FEROVIAR|TEXTIL|GR\\. SC\\.|CENTRUL SCOLAR|GRUP SCOLAR INDUSTRIAL|",
                   "THE CAMBRIDGE INTERNATIONAL SCHOOL IN|DIN |GRI|GRUP SCOLAR|GRUPUL SCOLAR INDUSTRIAL|GRUPUL SCOLAR|LICEUL|",
                   " N |NR |NR\\.|NR|NUMARUL|DE CONSTRUCTII-MONTAJ|COMPLEXUL SCOLAR|",
                   "SCOLAR|MONAHAL|DE COOPERATIE|MILITAR|AGROMONTAN|DE ECOLOGIE SI PROTECTIA MEDIULUI|NAVAL|PROTECTIA MEDIULUI|INDUSTRIA STICLEI|INDUSTRIE MICA|SERVICII|SI SERVICII|",
                   "CENTRU DE STUDII|CENTRUL DE STUDII|DE CHIMIE|COM\\.|MUN\\.|MUNICIPIUL|ORAS|AUTO|DE ECOLOGIE SI PROT\\. MED\\.|TEHNOLOGIC|",
                   "GRUPUL SCOLARDE ECOLOGIE SI PROT\\. MED\\.|DE AGROTURISM|POSTA|AGROTURISM|DE POSTA SI TELECOMUNICATII| DE CONSTRUCTII|\\(|\\)|",
                   "COLEGIUL|SEMINARUL|SI PROTECTIA MEDIULUI|AUTOMATIZARI|ELECTRONICA|DE ELECTRONICA SI AUTOMATIZARI|UCECOM|ECOLOGIC|",
                   "DE COMUNICATII|GRECO-CATOLIC|AGROINDUSTRIAL|AGROMONTAN|SCOALA|",
                   "ORASUL|ENERGETIC|CLASELE|I-VIII|COMUNA|TEHNOLOGIC|MESERII|BILINGV|GRUPUL|",
                   "PENTRU|SC\\. GEN\\.|LICEAL|LOCALITATEA|GERMAN|LICEUL|",
                   "SCOALA CU CLASELE I-VIII|SCOALA GENERALA CLASELE I-VIII|SCOALA GENERALA CU CLASELE I-VIII|SCOALA CU CL\\. I-VIII|SCOALA CLASELE I-VIII|SCOALA GENERALA|SCOALA CU CL\\. I-VIII\\.|SCOALA CU CL\\. I-VIII|SCOALA GIMNAZIALA|SCOALA")
  
  #do some high school-specific matching
  technical<-"(TEHNIC)|(TEHNOLOGIC)|(INDUSTRIAL)|(AGRICOL)|(SILVIC)|(UCECOM)|(PROFESIONALA)|(ECONOMIC)|(GRUP SCOLAR)|(GRUPUL SCOLAR)|(GR\\. SC\\.)|(CONSTRUCTII)"
  theory<-"(TEORETIC)|(PEDAGOGIC)|(COLEGIUL NATIONAL)"
  theology<-"(MONAHAL)|(TEOLOGIC)|(SEMINAR)|(ORTODOX)|(REFORMAT)|(CATOLIC)|(GRECO)|(CRESTIN)|(PENTICOSTAL)|(ADEVNTIST)|(BAPTIST)"
  sports<-"(SPORTIV)|(ATLETISM)|(SPORT)"
  private<-"(PARTICULAR)|(PRIVAT)"
  special<-"(SPECIAL)|(INCLUZIV)|(DEFICIENT)"
  arts<-"( DANS)|(COREGRAFIE)|(MUZICA)|(DE ARTA)|(DE ARTE)|(PLASTICE)|(PLASTICA)|(ARTISTIC)"
  language<-"(CROAT)|(BILINGV)|(MAGHIAR)|(GERMAN)"
  school<-school_words
  
  types_ms<-list(technical,theory,theology,sports,private,special,arts)
  
  ###########################################LOAD DATA
  ########STUDENT DATA
  ms<-data 
  
  
  ms<-ms %>%
    #filter(judet.ms==county) %>% 
    mutate(school_number=str_extract(scoala_de_provenienta,'(?<=[NR ])[0-9]+')) %>%
    
    mutate(school_name_stripped=gsub(ms_words,"",scoala_de_provenienta)) %>%
    mutate(school_name_stripped=trimws(gsub('\\.','',school_name_stripped,perl=T))) %>%
    mutate(school_name_stripped=gsub(',','',school_name_stripped,perl=T)) %>%
    mutate(school_name_stripped=ifelse(is.na(school_name_stripped),"",school_name_stripped)) %>%
    mutate(school_name_stripped=gsub("[[:punct:]]","",school_name_stripped))  %>%
    mutate(school_name_stripped=trimws(gsub("\\s+"," ",school_name_stripped,perl=T)))  %>%
    
    rowwise %>%
    mutate(school_name_stripped_no_town=trimws(gsub(town_ms_adm,"",school_name_stripped))) %>%
    rowwise %>%
    mutate(school_name_no_town=trimws(gsub(town_ms_adm,"",scoala_de_provenienta))) %>%
    ungroup %>%
    mutate(school_name_quote=stri_extract_all_regex(scoala_de_provenienta, '(?<=").*?(?=")')) %>%
    mutate(school_name_quote=trimws(gsub('\\.','',school_name_quote,perl=T))) %>%
    mutate(school_name_quote=gsub(',','',school_name_quote,perl=T)) %>%
    mutate(school_name_quote=ifelse(is.na(school_name_quote),"",school_name_quote)) %>%
    mutate(school_name_quote=gsub("[[:punct:]]","",school_name_quote))  %>%
    mutate(school_name_quote=trimws(gsub("\\s+"," ",school_name_quote,perl=T)))  %>%
    
    mutate(school_name_minimal=gsub('\"[^\"]+\"','',scoala_de_provenienta,perl=T)) %>%
    mutate(school_name_minimal=gsub(ms_words,'',school_name_minimal,perl=T)) %>%
    mutate(school_name_minimal=gsub("[[:punct:]]","",school_name_minimal))  %>%
    mutate(school_name_minimal=trimws(gsub("\\s+"," ",school_name_minimal,perl=T)))  %>%
    
    mutate(school_name_abbrev=gsub('((?<!\\b)[^\\s](?=[a-zA-Z]+[\\s]|[\\s])|\\.)','',school_name_quote,perl=T)) %>%
    mutate(school_name_abbrev=gsub("[[:punct:]]","",school_name_abbrev))  %>%
    mutate(school_name_abbrev=trimws(gsub("\\s+"," ",school_name_abbrev,perl=T)))  %>%
    
    mutate(school_name_punct=gsub("[[:punct:]]","",scoala_de_provenienta))  %>%
    mutate(school_name_punct=trimws(gsub("\\s+"," ",school_name_punct,perl=T)))  %>%
    
    
    mutate(technical=grepl(technical,scoala_de_provenienta))  %>%
    mutate(theory=grepl(theory,scoala_de_provenienta)) %>%
    mutate(theology=grepl(theology,scoala_de_provenienta)) %>%
    mutate(sports=grepl(sports,scoala_de_provenienta)) %>%
    mutate(private=grepl(private,scoala_de_provenienta)) %>%
    mutate(arts=grepl(arts,scoala_de_provenienta)) %>%
    mutate(middle=grepl(school_words,scoala_de_provenienta)) %>%
    mutate(language=grepl(language,scoala_de_provenienta)) %>%
    mutate(is_school=grepl(school_words,scoala_de_provenienta))
  
  
  #setwd("C:/Users/Andrei/Google Drive/Research/20190300 Romania Bac/Unused Data and Future Ressources/Ministerul Educatiei Geocodare/")
  setwd(wd_data_raw_other)
  
  #LOAD GOVERNMENT DATA WITH GPS COORDINATES
  school_data<-read_excel('schools.xlsx') %>% group_by(Judet,`Cod SIIIR`) %>% filter(Nivel%in%c('Primar','Gimnazial', 'Liceal','Profesional','Postliceal')) %>% slice(1) %>% ungroup()
  colnames(school_data)[names(school_data)=="Cod SIIIR"]<-'Cod_SIIIR'
  colnames(school_data)[names(school_data)=="Cod SIRUES"]<-'Cod_SIRUES'
  colnames(school_data)[names(school_data)=="Cod siruta"]<-'Cod_SIRUTA'
  school_data$Cod_SIIIR<-as.character(school_data$Cod_SIIIR)
  school_data<-school_data %>% mutate(Cod_SIIIR=ifelse(nchar(Cod_SIIIR)<10,paste0("0",Cod_SIIIR),Cod_SIIIR))
  
  #Load geo data
  geo_data<-read_excel('20170327-coordonategps-scoli.xlsx') 
  #merge data
  gps_data_merged<-base::merge(school_data,geo_data,by.x=c('Cod_SIIIR'),by.y=c('Cod_SIIIR'),all.x=T)
  #Keep relevant fields
  gps_data_merged<-gps_data_merged %>% 
    select(Judet,Denumire,Localitate,LAT,LONG,Cod_SIIIR,Cod_SIRUES,Cod_SIRUTA) %>% 
    rename(judet.gps=Judet,school.gps=Denumire,town.gps=Localitate,lat.gps=LAT,lon.gps=LONG) %>%
    filter(!is.na(lat.gps))
  
  #clean fields of special characters
  Sys.setlocale(locale="Romanian")
  setwd(wd_code)
  setwd("./Codes/")
  source('clean_codes_govt.R')
  gps_data_merged<-clean_govt(gps_data_merged)
  gps_data_merged<-as.data.frame((gps_data_merged))
  gps_data_merged<-gps_data_merged %>% 
    mutate(school_number=str_extract(school.gps,'(?<=[NR ])[0-9]+')) %>%
    
    mutate(school_name_stripped=gsub(ms_words,"",school.gps)) %>%
    mutate(school_name_stripped=trimws(gsub('\\.','',school_name_stripped,perl=T))) %>%
    mutate(school_name_stripped=gsub(',','',school_name_stripped,perl=T)) %>%
    mutate(school_name_stripped=ifelse(is.na(school_name_stripped),"",school_name_stripped)) %>%
    mutate(school_name_stripped=gsub("[[:punct:]]","",school_name_stripped))  %>%
    mutate(school_name_stripped=trimws(gsub("\\s+"," ",school_name_stripped,perl=T)))  %>%
    rowwise %>%
    mutate(school_name_stripped_no_town=trimws(gsub(town.gps,"",school_name_stripped))) %>%
    rowwise %>%
    mutate(school_name_no_town=trimws(gsub(town.gps,"",school.gps))) %>%
    ungroup %>%
    mutate(school_name_quote=stri_extract_all_regex(school.gps, '(?<=").*?(?=")')) %>%
    mutate(school_name_quote=trimws(gsub('\\.','',school_name_quote,perl=T))) %>%
    mutate(school_name_quote=gsub(',','',school_name_quote,perl=T)) %>%
    mutate(school_name_quote=ifelse(is.na(school_name_quote),"",school_name_quote)) %>%
    mutate(school_name_quote=gsub("[[:punct:]]","",school_name_quote))  %>%
    mutate(school_name_quote=trimws(gsub("\\s+"," ",school_name_quote,perl=T)))  %>%
    
    mutate(school_name_minimal=gsub('\"[^\"]+\"','',school.gps,perl=T)) %>%
    mutate(school_name_minimal=gsub(ms_words,'',school_name_minimal,perl=T)) %>%
    mutate(school_name_minimal=gsub("[[:punct:]]","",school_name_minimal))  %>%
    mutate(school_name_minimal=trimws(gsub("\\s+"," ",school_name_minimal,perl=T)))  %>%
    
    mutate(school_name_abbrev=gsub('((?<!\\b)[^\\s](?=[a-zA-Z]+[\\s]|[\\s])|\\.)','',school_name_quote,perl=T)) %>%
    mutate(school_name_abbrev=gsub("[[:punct:]]","",school_name_abbrev))  %>%
    mutate(school_name_abbrev=trimws(gsub("\\s+"," ",school_name_abbrev,perl=T)))  %>%
    
    mutate(school_name_punct=gsub("[[:punct:]]","",school.gps))  %>%
    mutate(school_name_punct=trimws(gsub("\\s+"," ",school_name_punct,perl=T)))  %>%
    
    mutate(technical=grepl(technical,school.gps))  %>%
    mutate(theory=grepl(theory,school.gps)) %>%
    mutate(theology=grepl(theology,school.gps)) %>%
    mutate(sports=grepl(sports,school.gps)) %>%
    mutate(private=grepl(private,school.gps)) %>%
    mutate(arts=grepl(arts,school.gps)) %>%
    mutate(middle=grepl(school_words,school.gps)) %>%
    mutate(language=grepl(language,school.gps)) %>%
    mutate(is_school=grepl(school_words,school.gps))
  
  #remove duplicates; some schools have 2 locations, but these are typically very close to one another
  gps_data_merged<-gps_data_merged %>% 
    group_by(judet.gps,school.gps) %>% 
    slice(1)
  #%>% filter(judet.gps==county)
  
  
  ##############fix town names in ms, using towns from gps data
  towns<-gps_data_merged %>% group_by(judet.gps,town.gps) %>% summarize()
  for (county in unique(towns$judet.gps)){
    #print(county)
    towns_temp<- towns %>% filter(judet.gps==county)
    for (town in towns_temp$town.gps){
      ms<-ms %>% ungroup() %>% mutate(town_ms_adm=ifelse(judet_ms==county & grepl(town,town_ms_adm),town,town_ms_adm))
    }
  }
  
  
  #match on name
  #ms<-ms %>% group_by(judet,town_ms_adm) %>% mutate(n=n()) %>% ungroup() %>% filter(scoala_de_provenienta!='HS NOT MATCHED')
  
  ms_matched<-lapply(unique(ms$judet_ms), function(county){
    lms<-ms %>% filter(judet_ms==county)
    rms<-gps_data_merged %>% filter(judet.gps==county) 
    
    ms_matched <- stringdist_left_join(lms,rms,by=c("scoala_de_provenienta"="school.gps"),distance_col="dist",max_dist=5) %>%
      group_by(scoala_de_provenienta) %>%
      arrange(dist) %>%
      slice(1)
    return(ms_matched)
  })
  ms_matched<- bind_rows(ms_matched) %>% filter(dist<4) %>% arrange(-dist)
  ms_unmatched<-anti_join(ms,ms_matched,by=c("judet_ms","scoala_de_provenienta"))
  
  #match on extracted name
  ms_matched_temp<-lapply(unique(ms$judet_ms), function(county){
    lms<-ms_unmatched %>% filter(judet_ms==county & school_name_abbrev!="")
    rms<-gps_data_merged %>% filter(judet.gps==county & school_name_abbrev!="") 
    
    ms_matched <- stringdist_left_join(lms,rms,by=c("school_name_abbrev"="school_name_abbrev"),distance_col="dist",max_dist=2) %>%
      group_by(scoala_de_provenienta) %>%
      arrange(dist) %>%
      slice(1)
    return(ms_matched)
  })
  ms_matched_temp<- bind_rows(ms_matched_temp) %>% filter(dist<4) %>% arrange(-dist)
  ms_matched<-bind_rows(ms_matched,ms_matched_temp) 
  ms_unmatched<-anti_join(ms,ms_matched,by=c("judet_ms","scoala_de_provenienta"))
  
  #match on other name
  ms_matched_temp<-lapply(unique(ms$judet_ms), function(county){
    lms<-ms_unmatched %>% filter(judet_ms==county & school_name_stripped!="")
    rms<-gps_data_merged %>% filter(judet.gps==county & school_name_stripped!="") 
    
    ms_matched <- stringdist_left_join(lms,rms,by=c("school_name_stripped"="school_name_stripped"),distance_col="dist",max_dist=2) %>%
      group_by(scoala_de_provenienta) %>%
      arrange(dist) %>%
      slice(1)
    return(ms_matched)
  })
  ms_matched_temp<- bind_rows(ms_matched_temp)
  ms_matched_temp<- ms_matched_temp%>% filter(dist<4) %>% arrange(-dist)
  ms_matched<-bind_rows(ms_matched,ms_matched_temp) 
  ms_unmatched<-anti_join(ms,ms_matched,by=c("judet_ms","scoala_de_provenienta"))
  
  
  
  #match on other name
  ms_matched_temp<-lapply(unique(ms$judet_ms), function(county){
    lms<-ms_unmatched %>% filter(judet_ms==county & school_name_stripped_no_town!="")
    rms<-gps_data_merged %>% filter(judet.gps==county & school_name_stripped_no_town!="")
    
    ms_matched <- stringdist_left_join(lms,rms,by=c("school_name_stripped_no_town"="school_name_stripped_no_town"),distance_col="dist",max_dist=2) %>%
      group_by(scoala_de_provenienta) %>%
      arrange(dist) %>%
      slice(1)
    return(ms_matched)
  })
  ms_matched_temp<- bind_rows(ms_matched_temp) %>% filter(dist<4) %>% arrange(-dist)
  ms_matched<-bind_rows(ms_matched,ms_matched_temp)
  ms_unmatched<-anti_join(ms,ms_matched,by=c("judet_ms","scoala_de_provenienta"))
  
  #match name with no town
  ms_matched_temp<-lapply(unique(ms$judet_ms), function(county){
    lms<-ms_unmatched %>% filter(judet_ms==county & school_name_no_town!="")
    rms<-gps_data_merged %>% filter(judet.gps==county & school_name_no_town!="")
    
    ms_matched <- stringdist_left_join(lms,rms,by=c("school_name_no_town"="school_name_no_town"),distance_col="dist",max_dist=2) %>%
      group_by(scoala_de_provenienta) %>%
      arrange(dist) %>%
      slice(1)
    return(ms_matched)
  })
  ms_matched_temp<- bind_rows(ms_matched_temp) %>% filter(dist<4) %>% arrange(-dist)
  ms_matched<-bind_rows(ms_matched,ms_matched_temp)
  ms_unmatched<-anti_join(ms,ms_matched,by=c("judet_ms","scoala_de_provenienta"))
  gps_data_merged_unmatched<-anti_join(gps_data_merged,ms_matched,by.x=c("judet.gps","school.gps"),by.y=c("judet_ms","scgool.gps"))
  
  #match in small towns
  ms_matched_temp<-lapply(unique(ms$judet_ms), function(county){
    #print(paste(county))
    lms<-ms_unmatched %>% filter(judet_ms==county & n_schools==1 & str_extract(town,"^[a-zA-Z]") %in% unique(str_extract(gps_data_merged_unmatched[gps_data_merged_unmatched$judet.gps==county,]$town.gps,"^[a-zA-Z]")))
    ms_matched_temp<-lapply(unique(lms$town_ms_adm), function(town){

      lms<-ms_unmatched %>% filter(judet_ms==county & town_ms_adm==town) 
      rms<-gps_data_merged_unmatched %>% filter(judet.gps==county & (stringdist(town.gps,town)<3))
      
      ms_matched_temp <- base::merge(lms,rms,by=NULL) %>% mutate(dist=999)

      return(ms_matched_temp)
    })
    
    ms_matched_temp<- bind_rows(ms_matched_temp)
    return(ms_matched_temp)
  })
  
  ms_matched_temp<- bind_rows(ms_matched_temp) 
  if (nrow(ms_matched_temp)+ncol(ms_matched_temp)>0){
    ms_matched_temp<- ms_matched_temp %>% filter(!is.na(dist)) %>% arrange(-dist)
    ms_matched<-bind_rows(ms_matched,ms_matched_temp) 
    ms_unmatched<-anti_join(ms,ms_matched,by=c("judet_ms","scoala_de_provenienta"))
  }
  
  
  codes<-ms_matched %>% 
    select(an_adm,judet_ms,scoala_de_provenienta,scoala_de_provenienta_orig,Cod_SIIIR,Cod_SIRUES,Cod_SIRUTA,lat.gps,lon.gps) %>%
    rename(Cod_SIIIR_ms_adm=Cod_SIIIR,Cod_SIRUES_ms_adm=Cod_SIRUES,Cod_SIRUTA_ms_adm=Cod_SIRUTA,lat_ms_adm=lat.gps,lon_ms_adm=lon.gps)
  
  #setwd("C:/Users/Andrei/Google Drive/Research/20190300 Romania Bac/")
  setwd(wd_data_intermediate)
  data_full<-base::merge(data_full,codes,by.x=c("an_adm","judet_ms","scoala_de_provenienta","scoala_de_provenienta_orig"),by.y=c("an_adm","judet_ms","scoala_de_provenienta","scoala_de_provenienta_orig"),all.x=T) 
  saveRDS(data_full,paste0("data_adm_hs_ms_siiir_",year,".rds"))
  message(paste('Observations after:',nrow(data_full)))
  
  temp<-data_full %>% 
    filter(scoala_de_provenienta!='' & !is.na(scoala_de_provenienta))
  num<-nrow(temp %>% filter(!is.na(Cod_SIIIR_ms_adm)))
  denom<-nrow(temp)
  match_rate<-num/denom*100
  print(paste0(match_rate, "% of observations now have a SIIIR code."))
  
  #test<-gps_data_merged %>% filter(grepl('ALBESCU',school.gps))
  
  # match_rate<-sum(!is.na(data_full$Cod_SIIIR))/nrow(data_full)*100
  # print(paste0(match_rate, "% of observations now have a SIIIR code."))
  
  list<-list(data_full,codes)
  #setwd(wd)
  return(list)
}

